The objective of this notebook is to perform a computational dissection of the main differentiation pathway that CD4-T cells follow using the most variable features of our dataset.
library(Seurat)
library(Signac)
library(GenomicRanges)
library(dplyr)
library(ggplot2)
library(tidyverse)
library(data.table)
library(ggpubr)
library(xlsx)
library(plyr)
library(stringr)
cell_type <- "CD4_T"
path_to_obj <- paste0(
here::here("scATAC-seq/results/R_objects/level_5/"),
cell_type,
"/04.",
cell_type,
"_integration_peak_calling_level_5.rds",
sep = ""
)
color_palette <- c("#1CFFCE", "#90AD1C", "#C075A6", "#85660D",
"#5A5156", "#AA0DFE", "#F8A19F", "#F7E1A0",
"#1C8356", "#FEAF16", "#822E1C", "#C4451C",
"#1CBE4F", "#325A9B", "#F6222E", "#FE00FA",
"#FBE426", "#16FF32", "black", "#3283FE",
"#B00068", "#DEA0FD", "#B10DA1", "#E4E1E3",
"#90AD1C", "#FE00FA", "#85660D", "#3B00FB",
"#822E1C", "coral2", "#1CFFCE", "#1CBE4F",
"#3283FE", "#FBE426", "#F7E1A0", "#325A9B",
"#2ED9FF", "#B5EFB5", "#5A5156", "#DEA0FD",
"#FEAF16", "#683B79", "#B10DA1", "#1C7F93",
"#F8A19F", "dark orange", "#FEAF16",
"#FBE426", "Brown")
plot_dim <- function(seurat, group,color_palette){
DimPlot(seurat,
group.by = group,
cols = color_palette,
pt.size = 0.1)}
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat
## 93116 features across 16383 samples within 1 assay
## Active assay: peaks_level_5 (93116 features, 92407 variable features)
## 3 dimensional reductions calculated: umap, lsi, harmony
seurat_peaks <- seurat@assays$peaks_level_5@ranges
#pdf(file = here::here("scATAC-seq/results/plots/CD4-T/UMAP_final_annotation.pdf"),
# width = 6, height = 4)
print(plot_dim(seurat, group = "annotation_paper",color_palette))
#dev.off()
At low level of resolution, we want to detect the main epigenomic changes between Non-Tfh vs Tfh. For this reason, we decide to group the cells in 4 clusters: Non-Tfh, Tfh, Central Memory and Naive.
seurat@meta.data <- seurat@meta.data %>% mutate(Group =
case_when(annotation_paper == "Naive" ~ "Naive",
annotation_paper == "CM Pre-non-Tfh" ~ "Central Memory",
annotation_paper == "CM PreTfh" ~ "Central Memory",
annotation_paper == "T-Trans-Mem" ~ "Non-Tfh",
annotation_paper == "T-Eff-Mem" ~ "Non-Tfh",
annotation_paper == "T-helper" ~ "Non-Tfh",
annotation_paper == "Tfh T:B border" ~ "Tfh",
annotation_paper == "Tfh-LZ-GC" ~ "Tfh",
annotation_paper == "GC-Tfh-SAP" ~ "Tfh",
annotation_paper == "GC-Tfh-0X40" ~ "Tfh",
annotation_paper == "Tfh-Mem" ~ "Tfh",
annotation_paper == "Memory T cells" ~ "Non-Tfh",
annotation_paper == "Eff-Tregs" ~ "Non-Tfh",
annotation_paper == "non-GC-Tf-regs" ~ "Non-Tfh",
annotation_paper == "GC-Tf-regs" ~ "Non-Tfh"))
#pdf(file = here::here("scATAC-seq/results/plots/CD4-T/UMAP_main_groups.pdf"),
# width = 6,
# height = 4)
print(plot_dim(seurat, group = "Group",
color_palette = c("black", "gray", "red", "yellow")))
#dev.off()
We have seen that the relation of the second and the third components of the LSI reduction reflects the cell-fate decision of the naïve CD4-T cell towards Non-Tfh or Tfh. In order to verify this effect in detail, we decided to extract the feature.loadings of these two components and filter by the high variable peaks that could be explain this differention process. Using those peaks, we can re-compute the UMAP and see how the trajectory appears clearer.
a1 <- DimPlot(
seurat,
group.by = "Group",
cols = color_palette,
pt.size = 0.4,
reduction = "lsi",
dims = 2:3
)
a2 <- DimPlot(
seurat,
group.by = "annotation_paper",
cols = color_palette,
pt.size = 0.4,
reduction = "lsi",
dims = 2:3
)
a2
a1
loading_2_3 <- seurat@reductions$lsi@feature.loadings[,c("LSI_2","LSI_3")]
loading_2_3.df <- as.data.frame(seurat@reductions$lsi@feature.loadings[,c("LSI_2","LSI_3")])
loading_2_3.melt <- melt(loading_2_3)
ggviolin(loading_2_3.melt, "Var2", "value",
fill = "Var2", palette = c("#00AFBB", "#E7B800"),
add = "boxplot", add.params = list(fill = "white")) +
geom_hline(yintercept = c(-0.015,0.015))
# filter dataframe to get data to be highligheted
loadings_high <- loading_2_3.melt[loading_2_3.melt$value > 0.015 | loading_2_3.melt$value < -0.015,]$Var1
highlight_df <- loading_2_3.df[loadings_high,]
loading_2_3.df %>%
ggplot(aes(x=LSI_2,y=LSI_3)) +
geom_point(alpha=0.3) +
geom_point(data=highlight_df,
aes(x=LSI_2,y=LSI_3),
color='red',
size=2) + theme_minimal()
We can re-compute the UMAP using the high variable features previously defined.
loadings_high_UMAP <- RunUMAP(object = seurat,
features = loadings_high)
#saveRDS(object = loadings_high_UMAP,
# file = here::here("scATAC-seq/results/plots/CD4-T/files_plots/loadings_high_UMAP.rds"))
#pdf(file = here::here("scATAC-seq/results/plots/CD4-T/UMAP_HVF_4groups.pdf"),
# width = 6, height = 4)
DimPlot(object = loadings_high_UMAP,
label = F,group.by = "Group",
pt.size = 0.3,
cols = c("black", "gray", "red", "yellow"))
#dev.off()
DimPlot(ncol = 4, object = loadings_high_UMAP,
label = F,split.by = "Group",
pt.size = 0.3,
cols = color_palette) + NoLegend()
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Motif_TF/lib/libopenblasp-r0.3.10.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] plyr_1.8.6 xlsx_0.6.5 ggpubr_0.4.0 data.table_1.14.0 forcats_0.5.0 stringr_1.4.0 purrr_0.3.4 readr_1.4.0 tidyr_1.1.3 tibble_3.1.2 tidyverse_1.3.0 ggplot2_3.3.5 dplyr_1.0.7 GenomicRanges_1.40.0 GenomeInfoDb_1.24.2 IRanges_2.22.1 S4Vectors_0.26.0 BiocGenerics_0.34.0 Signac_1.2.1 SeuratObject_4.0.2 Seurat_4.0.3 BiocStyle_2.16.1
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.1 reticulate_1.20 tidyselect_1.1.1 htmlwidgets_1.5.3 grid_4.0.3 docopt_0.7.1 BiocParallel_1.22.0 Rtsne_0.15 munsell_0.5.0 codetools_0.2-17 ica_1.0-2 future_1.21.0 miniUI_0.1.1.1 withr_2.4.2 colorspace_2.0-2 knitr_1.30 rstudioapi_0.11 ROCR_1.0-11 ggsignif_0.6.0 tensor_1.5 rJava_0.9-13 listenv_0.8.0 labeling_0.4.2 slam_0.1-47 GenomeInfoDbData_1.2.3 polyclip_1.10-0 farver_2.1.0 rprojroot_2.0.2 parallelly_1.26.1 vctrs_0.3.8 generics_0.1.0 xfun_0.18 lsa_0.73.2 ggseqlogo_0.1 R6_2.5.0 bitops_1.0-7 spatstat.utils_2.2-0 assertthat_0.2.1 promises_1.2.0.1 scales_1.1.1 gtable_0.3.0 globals_0.14.0 goftest_1.2-2 rlang_0.4.11 RcppRoll_0.3.0 splines_4.0.3 rstatix_0.6.0 lazyeval_0.2.2 spatstat.geom_2.2-0 broom_0.7.2 BiocManager_1.30.10
## [52] yaml_2.2.1 reshape2_1.4.4 abind_1.4-5 modelr_0.1.8 backports_1.1.10 httpuv_1.6.1 tools_4.0.3 bookdown_0.21 ellipsis_0.3.2 spatstat.core_2.2-0 RColorBrewer_1.1-2 ggridges_0.5.3 Rcpp_1.0.6 zlibbioc_1.34.0 RCurl_1.98-1.2 rpart_4.1-15 deldir_0.2-10 pbapply_1.4-3 cowplot_1.1.1 zoo_1.8-9 haven_2.3.1 ggrepel_0.9.1 cluster_2.1.0 here_1.0.1 fs_1.5.0 magrittr_2.0.1 RSpectra_0.16-0 scattermore_0.7 openxlsx_4.2.4 lmtest_0.9-38 reprex_0.3.0 RANN_2.6.1 SnowballC_0.7.0 fitdistrplus_1.1-5 matrixStats_0.59.0 xlsxjars_0.6.1 hms_0.5.3 patchwork_1.1.1 mime_0.11 evaluate_0.14 xtable_1.8-4 rio_0.5.16 sparsesvd_0.2 readxl_1.3.1 gridExtra_2.3 compiler_4.0.3 KernSmooth_2.23-17 crayon_1.4.1 htmltools_0.5.1.1 mgcv_1.8-33 later_1.2.0
## [103] lubridate_1.7.9 DBI_1.1.0 tweenr_1.0.1 dbplyr_1.4.4 MASS_7.3-53 Matrix_1.3-4 car_3.0-10 cli_3.0.0 igraph_1.2.6 pkgconfig_2.0.3 foreign_0.8-80 plotly_4.9.4.1 spatstat.sparse_2.0-0 xml2_1.3.2 XVector_0.28.0 rvest_0.3.6 digest_0.6.27 sctransform_0.3.2 RcppAnnoy_0.0.18 spatstat.data_2.1-0 Biostrings_2.56.0 rmarkdown_2.5 cellranger_1.1.0 leiden_0.3.8 fastmatch_1.1-0 uwot_0.1.10 curl_4.3.2 shiny_1.6.0 Rsamtools_2.4.0 lifecycle_1.0.0 nlme_3.1-150 jsonlite_1.7.2 carData_3.0-4 viridisLite_0.4.0 fansi_0.5.0 pillar_1.6.1 lattice_0.20-41 fastmap_1.1.0 httr_1.4.2 survival_3.2-7 glue_1.4.2 qlcMatrix_0.9.7 zip_2.1.1 png_0.1-7 ggforce_0.3.2 stringi_1.6.2 blob_1.2.1 irlba_2.3.3 future.apply_1.7.0